Josh Quan
“The simple graph has brought more information to the data analyst’s mind than any other device.”
— John Tukey
gg in “ggplot2” stands for Grammar of GraphicsStanford Open Policing Project
## spc_tbl_ [114 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ state : chr [1:114] "CO" "CO" "CO" "CO" ...
## $ driver_race : chr [1:114] "white" "white" "white" "white" ...
## $ pre_legalization : logi [1:114] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ quarter : Date[1:114], format: "2011-02-15" "2011-05-15" ...
## $ search_rate : num [1:114] 0.00454 0.00402 0.00356 0.00373 0.00449 ...
## $ legalization_status: chr [1:114] "pre" "pre" "pre" "pre" ...
## $ search_rate_100 : num [1:114] 0.454 0.402 0.356 0.373 0.449 ...
## - attr(*, "spec")=
## .. cols(
## .. state = col_character(),
## .. driver_race = col_character(),
## .. pre_legalization = col_logical(),
## .. quarter = col_date(format = ""),
## .. search_rate = col_double(),
## .. legalization_status = col_character(),
## .. search_rate_100 = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
## # A tibble: 6 × 7
## state driver_race pre_legalization quarter search_rate legalizati…¹ searc…²
## <chr> <chr> <lgl> <date> <dbl> <chr> <dbl>
## 1 CO white TRUE 2011-02-15 0.00454 pre 0.454
## 2 CO white TRUE 2011-05-15 0.00402 pre 0.402
## 3 CO white TRUE 2011-08-15 0.00356 pre 0.356
## 4 CO white TRUE 2011-11-15 0.00373 pre 0.373
## 5 CO white TRUE 2012-02-15 0.00449 pre 0.449
## 6 CO white TRUE 2012-05-15 0.00450 pre 0.450
## # … with abbreviated variable names ¹legalization_status, ²search_rate_100
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess")ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE)ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d()ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d() +
theme_minimal()ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d() +
theme_minimal() +
labs(x = "Year", y = "Search Rate", color = "Driver Race",
title = "Washington Highway Patrol Searches", subtitle = "Searches Per Hundred stops")ggplot(data = <DATA>) +
(mapping = aes(<MAPPINGS>)) +
<GEOM_FUNCTION>
ggplot(data = stops, aes(x = quarter, y = search_rate_100, size = search_rate_100)) +
geom_point(alpha = 0.5)Exercise: Using information from https://ggplot2.tidyverse.org/articles/ggplot2-specs.html add color, size, alpha, and shape aesthetics to your graph. Experiment. Do different things happen when you map aesthetics to discrete and continuous variables? What happens when you use more than one aesthetic?
geom levelgeomsggplot(data = stops, mapping = aes(x = quarter, y = search_rate_100)) +
geom_point() +
geom_smooth(aes(color = driver_race), method = "loess", se = FALSE)aes()ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100,
color = driver_race)) +
geom_point() aes()ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100)) +
geom_point(color = "#63B3E8") Exercise: What is wrong with the following?
What is wrong with the following?
stops %>%
ggplot(aes(x = quarter, y = search_rate_100, color = legalization_status)) %>%
geom_point()## Error in `geom_point()`:
## ! `mapping` must be created by `aes()`.
## ℹ Did you use `%>%` or `|>` instead of `+`?
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
geom_line()ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(span = 0.2, se = FALSE)ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = filter(stops, search_rate_100 < .2),
size = 5, color = "gray") +
geom_point()Exercise: Work with your neighbor to sketch what the following plots will look like. No cheating! Do not run the code, just think through the code for the time being.
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
geom_point(data = pre_legalization_high, size = 5, color = "gray")ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point()ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100, label = search_rate_100),
size = 2, color = "black")ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100 + .05, label = search_rate_100),
size = 2, color = "black")ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text_repel(data = pre_legalization_high,
aes(x = quarter, y = search_rate_100,
label = as.character(quarter)),
size = 3, color = "black")ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_label_repel(data = pre_legalization_high,
aes(x = quarter, y = search_rate_100,
label = as.character(quarter)),
size = 3, color = "black")ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
scale_color_manual(values = c("#FF6EB4", "#00BFFF", "#008B8B")) +
geom_smooth(se = FALSE)ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_smooth() +
facet_wrap( ~ driver_race)ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_grid(state ~ driver_race)ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_grid(driver_race ~ state)ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_reverse()ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_sqrt()ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_continuous(breaks = c(0, 0.25, 0.5, .75, 1.0))ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme_bw()ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme_dark() ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))wa_stops <- stops %>% filter(state == "WA") %>%
ggplot(aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(se = FALSE) +
labs(title = "Washington")
co_stops <- stops %>% filter(state == "CO") %>%
ggplot(aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(se = FALSE) +
labs(title = "Colorado") +
theme(legend.position = "none")Starter code:
stops %>% filter(state == "WA") %>%
ggplot(aes(quarter, search_rate_100, color = driver_race)) +
geom_point() +
geom_smooth(method = lm, se = FALSE) ‘?labs’ layer controls title, subtitle, caption, etc.
‘?scale_color_manual’ layer allows you to specify your own colors to the levels
‘?geom_vline’ layer draws a vertical line across the plot. (hint: the x-axis is a date data type)
‘?theme’ controls the non-data elements of the plot like size of text, angle of axis ticks, etc.
‘?annotate’ creates a text annotation layer. Same trick with coordinates as geom_vline
Experiment with themes
To really master themes:
ggplot2.tidyverse.org/articles/extending-ggplot2.html#creating-your-own-theme
Make any plot by filling in the parameters of this template